Emorie D Beck
my_theme <- function(){
theme_classic() +
theme(
legend.position = "bottom"
, legend.title = element_text(face = "bold", size = rel(1))
, legend.text = element_text(face = "italic", size = rel(1))
, axis.text = element_text(face = "bold", size = rel(1.1), color = "black")
, axis.title = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
, plot.subtitle = element_text(face = "italic", size = rel(1.2), hjust = .5)
, strip.text = element_text(face = "bold", size = rel(1.1), color = "white")
, strip.background = element_rect(fill = "black")
)
}ggplot2 logicshiny)cowplot, so I’m going to teach you thatggplot2 extensions: https://exts.ggplot2.tidyverse.org/gallery/
ggextracowplot (and lots of assortments)ggextraggextra because it will help us create plots with distributions in the margins.cowplotcowplot?
R plots and ggplot2 plotscowplotLet me show you a couple of examples from my work that has used cowplot
plot_grid()cowplot is plot_grid(), which allows us to place differnt figures within the same figure in a grid, and it has a lot of useful argumentsplotlist = NULLalign = c("none", "h", "v", "hv")axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")nrow = NULLncol = NULLrel_widths = 1rel_heights = 1labels = NULLlabel_size = 14label_fontfamily = NULLlabel_fontface = "bold"label_colour = NULLlabel_x = 0label_y = 1hjust = -0.5vjust = 1.5scale = 1greedy = TRUEbyrow = TRUEcols = NULLrows = NULLplot_grid()load(url("https://github.com/emoriebeck/psc290-data-viz-2022/blob/main/04-week4-associations/04-data/week4-data.RData?raw=true"))
pred_data# A tibble: 5,021 × 25
study o_value p_year SID p_value age gender grsWages parEdu race
<chr> <fct> <dbl> <chr> <dbl> <dbl> <fct> <dbl> <fct> <fct>
1 Study1 0 2005 61215 6.67 -29.9 1 1.02 2 0
2 Study1 0 2005 184965 0 -22.9 0 1.14 2 0
3 Study1 0 2005 488251 10 -3.92 1 0.717 1 0
4 Study1 0 2005 650779 7.22 -25.9 1 0.644 3 0
5 Study1 0 2005 969691 7.22 -0.925 1 0.812 2 0
6 Study1 0 2005 986687 6.11 14.1 0 1.76 2 0
7 Study1 0 2005 1054011 5.56 8.08 0 1.34 1 0
8 Study1 0 2005 1372251 7.78 5.08 1 0.842 1 0
9 Study1 0 2005 1496703 6.11 -23.9 0 1.42 2 0
10 Study1 0 2005 1897887 2.78 38.1 1 0.725 2 0
# … with 5,011 more rows, and 15 more variables: physhlthevnt <fct>,
# SRhealth <dbl>, smokes <fct>, alcohol <fct>, exercise <dbl>, BMI <dbl>,
# parDivorce <fct>, PhysFunc <fct>, religion <fct>, education <fct>,
# married <fct>, numKids <dbl>, parOccPrstg <dbl>, reliability <dbl>,
# predInt <dbl>
And remember these models?
tidy_ci <- function(m) tidy(m, conf.int = T)
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data
, ~glm(
o_value ~ p_value
, data = .
, family = binomial(link = "logit")
)
)
, tidy = map(m, tidy_ci)
)
nested_m# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [2 × 7]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [2 × 7]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [2 × 7]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
And remember these models?
Let’s do one small change
m_fun <- function(d) {
glm(o_value ~ p_value + married + married:p_value
, data = d
, family = binomial(link = "logit"))
}
tidy_ci <- function(m) tidy(m, conf.int = T) %>% mutate(df.resid = m$df.residual, n = nrow(m$data))
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data, m_fun)
, tidy = map(m, tidy_ci)
)
nested_m# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
Here’s our unnested model terms
nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp)# A tibble: 24 × 10
study term estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵ df.re…⁶ n
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 Study1 (Interc… 1.33 0.391 0.727 0.467 0.618 2.87 827 831
2 Study1 p_value 0.900 0.0572 -1.84 0.0655 0.804 1.01 827 831
3 Study1 married1 0.926 0.524 -0.147 0.883 0.330 2.59 827 831
4 Study1 p_value… 1.02 0.0745 0.238 0.812 0.880 1.18 827 831
5 Study2 (Interc… 0.705 1.59 -0.220 0.826 0.0250 18.1 992 1000
6 Study2 p_value 1.09 0.218 0.376 0.707 0.702 1.71 992 1000
7 Study2 married1 6.40 1.62 1.14 0.253 0.237 190. 992 1000
8 Study2 p_value… 0.758 0.221 -1.25 0.211 0.478 1.18 992 1000
9 Study3 (Interc… 6.03 1.20 1.49 0.135 0.581 68.2 996 1000
10 Study3 p_value 0.706 0.156 -2.23 0.0256 0.514 0.952 996 1000
# … with 14 more rows, and abbreviated variable names ¹estimate, ²std.error,
# ³statistic, ⁴conf.low, ⁵conf.high, ⁶df.resid
But maybe we are particularly interested in the interaction between marital status and personality in predicting mortality, which we want to plot as a forest plot
nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")# A tibble: 6 × 10
study term estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵ df.re…⁶ n
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 Study1 p_value:… 1.02 0.0745 0.238 0.812 0.880 1.18 827 831
2 Study2 p_value:… 0.758 0.221 -1.25 0.211 0.478 1.18 992 1000
3 Study3 p_value:… 1.21 0.161 1.17 0.242 0.886 1.68 996 1000
4 Study4 p_value:… 0.824 0.284 -0.683 0.495 0.471 1.46 570 574
5 Study5 p_value:… 0.618 0.159 -3.04 0.00239 0.449 0.838 612 616
6 Study6 p_value:… 0.957 0.109 -0.407 0.684 0.773 1.19 996 1000
# … with abbreviated variable names ¹estimate, ²std.error, ³statistic,
# ⁴conf.low, ⁵conf.high, ⁶df.resid
Let’s add our point estimates and uncertainty intervals
p1 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
p1 <- p1 %>%
mutate(study = factor(study, (p1 %>% arrange(desc(estimate)))$study)) %>%
ggplot(aes(x = estimate, y = study)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1Let’s add our point estimates and uncertainty intervals
p2 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
stdy_levs <- tibble(num = 1:6, new = (p2 %>% arrange(desc(estimate)))$study)
p2 <- p2 %>%
arrange(desc(estimate)) %>%
mutate(study = factor(study, stdy_levs$new)
, study2 = 1:n()) %>%
mutate_at(vars(estimate, conf.low, conf.high), ~sprintf("%.2f", .)) %>%
mutate(est = sprintf("%s [%s, %s]", estimate, conf.low, conf.high)
, n = as.character(n)) %>%
select(study, study2, estimate, n, est) %>%
pivot_longer(
cols = c(est, n)
, values_to = "lab"
, names_to = "est"
)
p2# A tibble: 12 × 5
study study2 estimate est lab
<fct> <int> <chr> <chr> <chr>
1 Study3 1 1.21 est 1.21 [0.89, 1.68]
2 Study3 1 1.21 n 1000
3 Study1 2 1.02 est 1.02 [0.88, 1.18]
4 Study1 2 1.02 n 831
5 Study6 3 0.96 est 0.96 [0.77, 1.19]
6 Study6 3 0.96 n 1000
7 Study4 4 0.82 est 0.82 [0.47, 1.46]
8 Study4 4 0.82 n 574
9 Study2 5 0.76 est 0.76 [0.48, 1.18]
10 Study2 5 0.76 n 1000
11 Study5 6 0.62 est 0.62 [0.45, 0.84]
12 Study5 6 0.62 n 616
We added an extra row at the top of the table, so we need to do that for the forest plot, too
p1 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
stdy_levs <- tibble(num = 1:6, new = (p1 %>% arrange(desc(estimate)))$study)
p1 <- p1 %>%
arrange(desc(estimate)) %>%
mutate(study = factor(study, stdy_levs$new)
, study2 = 1:n()) %>%
ggplot(aes(x = estimate, y = study2)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1Add our point estimates and uncertainty intervals, along with the vertical line at OR = 1
Change the y scale back
Add in that top bar
Remove the y axis line
And let’s block out where the dashed line touches the top:
plot_grid()annotate() is a great tool for thisscale_[map]_[type] functions, especially given the labels can be anything we want!theme elements also let us hack many more parts!ggplot2 is simply having lots of tricks up your sleeve, which come from knowledge (and StackOverflow)plot_grid()plotlist = NULLalign = c("none", "h", "v", "hv")axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")nrow = NULLncol = NULLrel_widths = 1rel_heights = 1labels = NULLlabel_size = 14label_fontfamily = NULLlabel_fontface = "bold"label_colour = NULLlabel_x = 0label_y = 1hjust = -0.5vjust = 1.5scale = 1greedy = TRUEbyrow = TRUEcols = NULLrows = NULLplot_grid()plotlist = NULLalign = c("none", "h", "v", "hv")axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")nrow = NULLncol = NULLrel_widths = 1rel_heights = 1labels = NULLlabel_size = 14label_fontfamily = NULLlabel_fontface = "bold"label_colour = NULLlabel_x = 0label_y = 1hjust = -0.5vjust = 1.5scale = 1greedy = TRUEbyrow = TRUEcols = NULLrows = NULLplot_grid()plot_grid()"hv" leads to odd spacingplot_grid()plot_grid()plot_grid()rel_heights
plot_grid(): Labelsplot_grid(): Labelsplot_grid(): Labelsplot_grid(): Labelsplot_grid(): Labelsplot_grid(): titlesIt’d be nice if the title was centered, right?
plot_grid(): titlesIt’d be nice if the title was centered, right?
cowplot also adds some other new tools to our repertoire:
ggdraw()draw_label()draw_plot_label()draw_grob()draw_image()ggdraw() + draw_label()
*ggdraw()is more or a setup function that allows us to add grobs on top * We'll use it withdraw_label()` to make our title (just some text to put on the plot
plot_grid(): titlestitle <- ggdraw() +
draw_label(
"Mortality Odds"
, fontface = 'bold'
, x = .5
, hjust = .5
, y = .8
) +
draw_label(
"Conscientiousness x Marital Status"
, fontface = 'italic'
, x = .5
, hjust = .5
, y = .2
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
titleplot_grid(): titles*cowplot also adds some other new tools to our repertoire: + ggdraw() + draw_label()
draw_label()
draw_label() is meant to be a better wrapper for geom_text() that requires less customizationgeom_text() would require 10+ arguments and has no easy application to figures put together with cowplot (or other packages for doing so)draw_plot()
inset <-
pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(y = gender, x = SRhealth, fill = gender)) +
scale_fill_manual(values = c("cornflowerblue", "coral")) +
scale_y_discrete(labels = c("Male", "Female")) +
stat_halfeye(alpha = .8) +
my_theme() +
theme(legend.position = "none") + theme_half_open(12)
p4 <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, SRhealth, fill = gender)) +
geom_point(shape = 21, color = "grey20", size = 3) +
scale_fill_manual(values = c("cornflowerblue", "coral"), labels = c("Male", "Female")) +
my_theme()draw_plot()
draw_image()
We can also add images!
draw_image()
We can also add images!
ggdraw() +
draw_plot(p3) +
draw_image(
"https://github.com/emoriebeck/psc290-data-viz-2022/raw/main/01-week1-intro/02-code/02-images/ucdavis_logo_blue.png"
, x = 1, y = 0.05, hjust = 1, vjust = 1, halign = 1, valign = 1,
width = 0.15
)draw_plot_label()
draw_grob()draw_image()PSC 290 - Data Visualization